##Maybe this is a case where convergence is assessed by one version of variable, other version used for poststratification?


## "Measuring Political Support and Issue Ownership Using Endorsement Experiments,
## with Application to Militant Groups in Pakistan"

library(foreign)
library(R2jags)

setwd("H:/Research Projects/Political Endorsements/")
load("data/pakistan.RData")
load("data/divisions.RData")

Y <- array(NA, c(nrow(pakistan),4,5))
Y[,1,1] <- pakistan$Polio.a
Y[,1,2] <- pakistan$Polio.b
Y[,1,3] <- pakistan$Polio.c
Y[,1,4] <- pakistan$Polio.d
Y[,1,5] <- pakistan$Polio.e
Y[,2,1] <- pakistan$Durand.a
Y[,2,2] <- pakistan$Durand.b
Y[,2,3] <- pakistan$Durand.c
Y[,2,4] <- pakistan$Durand.d
Y[,2,5] <- pakistan$Durand.e
Y[,3,1] <- pakistan$FCR.a
Y[,3,2] <- pakistan$FCR.b
Y[,3,3] <- pakistan$FCR.c
Y[,3,4] <- pakistan$FCR.d
Y[,3,5] <- pakistan$FCR.e
Y[,4,1] <- pakistan$Curriculum.a
Y[,4,2] <- pakistan$Curriculum.b
Y[,4,3] <- pakistan$Curriculum.c
Y[,4,4] <- pakistan$Curriculum.d
Y[,4,5] <- pakistan$Curriculum.e

N <- dim(Y)[1] 
J <- 4 ## number of issue areas / policies
K <- 5 ## number of treatment groups, including the control group

Y2 <- matrix(NA,N,J)
endorser <- matrix(NA, N, J)
for (i in 1:N){
for (j in 1:J){
  endorser[i,j] <- (1:5)[!is.na(Y[i,j,])]
  Y2[i,j] <- Y[i,j,endorser[i,j]]
}}
Y <- Y2


division <- pakistan$division 
province <- divisions$province.number
edu <- pakistan$edu
inc <- pakistan$inc
female <- pakistan$female
rural <- pakistan$rural
n.inc <- 3
n.edu <- 5
n.division <- max(divisions$division.number)
n.province <- max(divisions$province.number)

Prec.Div <- diag(.001,K-1)


##########################
##  The Division Model  ##
##########################
data <- list ("N", "J", "K", "Y", "endorser", 
	"division", "province", "n.division", "n.province")
inits <- function() {list(
  alpha0=matrix(seq(-2,2,length.out=(4*J)),J,4), 
  beta0=runif(J), x=rnorm(N), s=endorser-1, 
  delta.division0=rnorm(n.division), delta.province=rnorm(n.province),
  lambda.division0=matrix(c(rep(0,n.division), rep(0,n.division*(K-1))), n.division, K), theta.province=matrix(c(rep(0,n.province), rnorm(n.province*(K-1))), n.province, K),
  omega=c(0,runif(K-1)), sigma=runif(n.province), psi=matrix(c(rep(0,n.province), runif(n.province*(K-1))), n.province, K)
  )}
params <- c("beta", "cor.sx", "mean.correct", "delta.division", "lambda.division") 
model <- "code/JAGSmodels/ModelDivision.txt" 
fit <- jags(data, inits, params, model.file=model, n.iter=40000) #takes quite a while -- several days for convergence
fit <- autojags(fit, n.iter=65000, n.thin=100, Rhat=1.02, n.update=3)
fit
#save.image("data/Division2011.RData")
## mean(mean.correct) = 0.4192, though not yet converged


#####################################################
##  The Division Model with Individual Covariates  ##
#####################################################
data <- list ("N", "J", "K", "Y", "endorser",
	"female", "rural", "inc", "edu", "n.inc", "n.edu",
	"division", "province", "n.division", "n.province")
inits <- function() {list(
  alpha0=matrix(seq(-2,2,length.out=(J*4)), J,4), beta0=rep(1,J), x=rnorm(N), s=endorser-1, 
  delta.female0=0.5, delta.rural0=0.5, delta.inc0=rnorm(n.inc), delta.edu0=rnorm(n.edu), delta.division0=rep(0,n.division), delta.province=rep(0,n.province),
  theta.female0=c(0,rnorm(K-1)), theta.rural0=c(0,rnorm(K-1)), theta.inc0=matrix(c(rep(0,n.inc), rep(0,n.inc*(K-1))), n.inc, K), theta.edu0=matrix(c(rep(0,5), rep(0,n.edu*(K-1))), n.edu, K),
  lambda.division0=matrix(c(rep(0,n.division), rep(0,n.division*(K-1))), n.division, K), theta.province=matrix(c(rep(0,n.province), rep(0,n.province*(K-1))), n.province, K),
  omega=c(0,runif(K-1)), sigma=runif(n.province), phi=matrix(c(rep(0,n.province), runif(n.province*(K-1))), n.province, K)
  )}
params <- c("beta", "cor.sx", "mean.correct",
  "delta.female", "delta.rural", "delta.inc", "delta.edu", "delta.division", 
  "theta.female", "theta.rural", "theta.inc", "theta.edu", "lambda.division") 
model <- "code/JAGSmodels/ModelDivisionWithICovariates.txt" 
fit2 <- jags(data, inits, params, model.file=model, n.iter=40000) #each thousand takes 2.2 hours
fit2 <- autojags(fit2, n.iter=65000, n.thin=100, Rhat=1.02, n.update=3) #takes quite a while -- several weeks for convergence
fit2
#save.image("data/DivisionWithICovariates2011.RData")
## mean(mean.correct) = 0.4185




#############################################################
##  Produce .dta file for STATA
##  Run Ordered Probit with Errors Clustered by Respondent
#############################################################


y <- c(Y[,1], Y[,2], Y[,3], Y[,4])
dat <- as.data.frame(y)
dat$endorser <- c(endorser[,1], endorser[,2], endorser[,3], endorser[,4])
dat$division <- c(division, division, division, division)
dat$edu <- c(edu, edu, edu, edu)
dat$inc <- c(inc, inc, inc, inc)
dat$female <- c(female, female, female, female)
dat$rural <- c(rural, rural, rural, rural)
dat$id <- rep(1:length(edu), 4)
dat$question <- c( rep(1,length(edu)), rep(2,length(edu)), rep(3,length(edu)), rep(4,length(edu)) )
write.dta(dat, file="data/dat.dta")

## Code to use in STATA
use "H:\Research Projects\Political Endorsements\data\dat.dta", clear
oprobit y i.question endorser##division, cluster(id)
oprobit y i.question endorser##division endorser##edu endorser##inc endorser##female endorser##rural, cluster(id)
predict yhat1 yhat2 yhat3 yhat4 yhat5
outsheet yhat1 yhat2 yhat3 yhat4 yhat5 using "H:\Research Projects\Political Endorsements\data\STATAfitted.csv", comma replace


## Back in R, evaluating quality of fits
stata.fitted <- read.csv("data/STATAfitted.csv", header=T)
## Define deviation as difference from correct probability
## Standard being modal response -- 5 for each of the issue questions
correctly.classfied.null <- y==5
correct.classification.null <- mean(correctly.classfied.null )

correctly.classified.oprobit <- rep(NA, length(y))
for (i in 1:length(y)){
  correctly.classified.oprobit[i] <- y[i]==which(stata.fitted[i,]==max(stata.fitted[i,]))
  }
correct.classification.oprobit <- mean(correctly.classified.oprobit)
(correct.classification.oprobit - correct.classification.null) / (1 - correct.classification.null) #Proportional reduction in error (theoretical maximum of 1.00)

correctly.classified.ideal <- rep(NA, length(y))

correctly.classified.idealwithcovariates <- rep(NA, length(y))